In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [ ]:
df=pd.read_csv("CAC 2 (Responses) - Form Responses 1.csv")
In [ ]:
df.head()
Out[ ]:
Timestamp Course State Gender Overall how would you rate your mental health?\n Is there any history of mental health disorder in your family?\n On average, how many hours do you sleep per day?\n Have you ever used the counselling service provided by the college? Family Structure Is this the first time you're staying away from home? How is prayer and meditation important in your daily life? On average, how stressed are you? How long have you been in Lavasa? What strategies do you use to cope with stress and anxiety in your daily life?
0 10/10/2023 21:44:21 MSc DS Kerala Male 4 No 4-6 hours No Joint Family Yes 4 4 Less than 6 months Meditation
1 10/10/2023 22:14:11 MSc DS Kerala Female 2 No Less than 4 hours No Nuclear Family No 3 4 Less than 6 months Listening music
2 10/10/2023 22:15:16 MSc DS Uttar Pradesh Female 4 No 4-6 hours No Nuclear Family No 3 4 Less than 6 months Listening music
3 10/10/2023 22:19:15 MSc DS Kerala Female 2 No 7-8 hours Yes Nuclear Family No 3 4 Less than 6 months Sleeping
4 10/10/2023 22:28:00 MSc DS Other Male 3 No 4-6 hours No Nuclear Family No 5 4 Less than 6 months Sports
In [ ]:
df.tail()
Out[ ]:
Timestamp Course State Gender Overall how would you rate your mental health?\n Is there any history of mental health disorder in your family?\n On average, how many hours do you sleep per day?\n Have you ever used the counselling service provided by the college? Family Structure Is this the first time you're staying away from home? How is prayer and meditation important in your daily life? On average, how stressed are you? How long have you been in Lavasa? What strategies do you use to cope with stress and anxiety in your daily life?
310 10/24/2023 14:33:02 MBA Kerala Male 4 No Less than 4 hours No Nuclear Family Yes 3 4 Less than 6 months Sleeping
311 10/24/2023 14:33:26 MBA Kerala Female 4 No Less than 4 hours No Joint Family No 3 4 Less than 6 months Listening music
312 10/24/2023 14:35:22 MBA Kerala Female 4 No Less than 4 hours Yes Joint Family No 2 5 Less than 6 months Sleeping
313 10/24/2023 14:36:03 MBA Tamil Nadu Female 4 No Less than 4 hours No Joint Family No 3 4 Less than 6 months Listening music
314 10/24/2023 14:36:55 MBA West Bengal Male 4 No 4-6 hours No Joint Family Yes 4 4 Less than 6 months Sports
In [ ]:
df.columns
Out[ ]:
Index(['Timestamp', 'Course', 'State', 'Gender',
       'Overall how would you rate your mental health?\n',
       'Is there any history of mental health disorder in your family?\n',
       'On average, how many hours do you sleep per day?\n',
       'Have you ever used the counselling service provided by the college?',
       'Family Structure',
       'Is this the first time you're staying away from home?',
       'How is prayer and meditation important in your daily life?',
       'On average, how stressed are you? ',
       'How long have you been in Lavasa?',
       'What strategies do you use to cope with stress and anxiety in your daily life?'],
      dtype='object')
In [ ]:
df.drop(["Timestamp"],axis=1,inplace=True)  #removing timestamp
#changing column names for ease
df.columns=["Course","State","Gender","Mental Health Rating","Family History","Sleep Duration","Counselling Service Usage","Family Structure","First Time Away From Home","Importance of Prayer,Meditation","Stress Score","Length of Stay","Coping Strategies"]
#dropping values beyond scope of  study
df["State"]=df["State"].replace("Other",np.nan)
df["Course"]=df["Course"].replace("Other",np.nan)
#replacing course name for ease
df["Course"] = df["Course"].replace("MSc Global Finance and Analytics","Msc GFA")
df.dropna(inplace=True)
df.head(5)
Out[ ]:
Course State Gender Mental Health Rating Family History Sleep Duration Counselling Service Usage Family Structure First Time Away From Home Importance of Prayer,Meditation Stress Score Length of Stay Coping Strategies
0 MSc DS Kerala Male 4 No 4-6 hours No Joint Family Yes 4 4 Less than 6 months Meditation
1 MSc DS Kerala Female 2 No Less than 4 hours No Nuclear Family No 3 4 Less than 6 months Listening music
2 MSc DS Uttar Pradesh Female 4 No 4-6 hours No Nuclear Family No 3 4 Less than 6 months Listening music
3 MSc DS Kerala Female 2 No 7-8 hours Yes Nuclear Family No 3 4 Less than 6 months Sleeping
5 MSc DS West Bengal Female 1 No 4-6 hours No Nuclear Family Yes 3 5 Less than 6 months Watching movies/series
In [ ]:
df.shape
Out[ ]:
(303, 13)
In [ ]:
count_df=pd.DataFrame(index=df.columns)
count_df["Null Values"]=df.isnull().sum()
count_df["Unique Values"]=df.nunique()
count_df
Out[ ]:
Null Values Unique Values
Course 0 13
State 0 27
Gender 0 3
Mental Health Rating 0 5
Family History 0 2
Sleep Duration 0 4
Counselling Service Usage 0 2
Family Structure 0 2
First Time Away From Home 0 2
Importance of Prayer,Meditation 0 5
Stress Score 0 5
Length of Stay 0 3
Coping Strategies 0 7
In [ ]:
df.size
Out[ ]:
3939
In [ ]:
df.dtypes
Out[ ]:
Course                             object
State                              object
Gender                             object
Mental Health Rating                int64
Family History                     object
Sleep Duration                     object
Counselling Service Usage          object
Family Structure                   object
First Time Away From Home          object
Importance of Prayer,Meditation     int64
Stress Score                        int64
Length of Stay                     object
Coping Strategies                  object
dtype: object
In [ ]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Index: 303 entries, 0 to 314
Data columns (total 13 columns):
 #   Column                           Non-Null Count  Dtype 
---  ------                           --------------  ----- 
 0   Course                           303 non-null    object
 1   State                            303 non-null    object
 2   Gender                           303 non-null    object
 3   Mental Health Rating             303 non-null    int64 
 4   Family History                   303 non-null    object
 5   Sleep Duration                   303 non-null    object
 6   Counselling Service Usage        303 non-null    object
 7   Family Structure                 303 non-null    object
 8   First Time Away From Home        303 non-null    object
 9   Importance of Prayer,Meditation  303 non-null    int64 
 10  Stress Score                     303 non-null    int64 
 11  Length of Stay                   303 non-null    object
 12  Coping Strategies                303 non-null    object
dtypes: int64(3), object(10)
memory usage: 33.1+ KB
In [ ]:
df.describe()
Out[ ]:
Mental Health Rating Importance of Prayer,Meditation Stress Score
count 303.000000 303.000000 303.000000
mean 3.069307 3.224422 3.587459
std 1.144443 1.202562 1.147171
min 1.000000 1.000000 1.000000
25% 2.000000 2.000000 3.000000
50% 3.000000 3.000000 4.000000
75% 4.000000 4.000000 4.000000
max 5.000000 5.000000 5.000000

Observation: 25th percentile is 3. which means 75% of respondents are facing average/above-average stress

In [ ]:
df.shape
Out[ ]:
(303, 13)
In [ ]:
df.head()
Out[ ]:
Course State Gender Mental Health Rating Family History Sleep Duration Counselling Service Usage Family Structure First Time Away From Home Importance of Prayer,Meditation Stress Score Length of Stay Coping Strategies
0 MSc DS Kerala Male 4 No 4-6 hours No Joint Family Yes 4 4 Less than 6 months Meditation
1 MSc DS Kerala Female 2 No Less than 4 hours No Nuclear Family No 3 4 Less than 6 months Listening music
2 MSc DS Uttar Pradesh Female 4 No 4-6 hours No Nuclear Family No 3 4 Less than 6 months Listening music
3 MSc DS Kerala Female 2 No 7-8 hours Yes Nuclear Family No 3 4 Less than 6 months Sleeping
5 MSc DS West Bengal Female 1 No 4-6 hours No Nuclear Family Yes 3 5 Less than 6 months Watching movies/series
In [ ]:
df.to_csv("cleaned_data.csv")
In [ ]:
df=pd.read_csv("cleaned_data.csv")
In [ ]:
import seaborn as sns
import matplotlib.pyplot as plt
df["Stress Score"] = df["Stress Score"].replace(1,"Low_stress")
df["Stress Score"] = df["Stress Score"].replace(2,"Low_stress")
df["Stress Score"] = df["Stress Score"].replace(3,"Moderate_stress")
df["Stress Score"] = df["Stress Score"].replace(4,"High_stress")
df["Stress Score"] = df["Stress Score"].replace(5,"High_stress")
df["Course"] = df["Course"].replace("MSc Global Finance and Analytics","Msc GFA")
plt.figure(figsize=(12, 6))
sns.countplot(x='Course', hue='Stress Score', data=df,palette='dark:red',stat="percent")
plt.xlabel('stress')
plt.ylabel('Percent')
plt.title('Comparison of stress over the courses')
plt.xticks(rotation=45)
plt.legend(title='Legend')
plt.show()
No description has been provided for this image

Insight: Stress level of respondents in MBA ,MSc DS, and BA LLB have the highest stress levels in the college in the order they are written(descending).

In [ ]:
df=pd.read_csv("cleaned_data.csv")
plt.figure(figsize=(10,5))

sns.stripplot(y=df["Stress Score"],x=df["Mental Health Rating"],size=5,jitter=.2,alpha=0.7)
plt.title("Mental Health Rating Vs Stress Score")
plt.xlabel("Mental Health Rating")
plt.ylabel("Stress Score")
plt.show()
plt.close()
No description has been provided for this image

observation: a lot of the people who claimed they had good mental health, had high stress levels. this shows a lack of awareness among people as they are unable to identify their own mental health situation.

In [ ]:
df["Mental Health Rating"] = df["Mental Health Rating"].replace(1,"Low_Mental_Health")
df["Mental Health Rating"] = df["Mental Health Rating"].replace(2,"Low_Mental_Health")
df["Mental Health Rating"] = df["Mental Health Rating"].replace(3,"Average_Mental_Health")
df["Mental Health Rating"] = df["Mental Health Rating"].replace(4,"Good_Mental_Health")
df["Mental Health Rating"] = df["Mental Health Rating"].replace(5,"Good_Mental_Health")
mental_health_count=df.value_counts(["Mental Health Rating"])
mental_health_frame=mental_health_count.to_frame()
mental_health_frame.reset_index(inplace=True)
mental_health_frame.sort_values(by="count",ascending=True,inplace=True)
mental_health_frame
Out[ ]:
Mental Health Rating count
2 Average_Mental_Health 74
1 Low_Mental_Health 110
0 Good_Mental_Health 119
In [ ]:
label_data= mental_health_frame["Mental Health Rating"]
count_data = mental_health_frame["count"]

 
plt.pie(count_data, labels=label_data,autopct='%1.1f%%',wedgeprops = { 'linewidth' : 2, 'edgecolor' : 'white' },colors=["red","darkgreen","hotpink","purple","orange"],startangle=0)
plt.title("Mental Health Rating among Students")
plt.show()
No description has been provided for this image

Insight: Approx. 40% of respondents have good mental health

In [ ]:
df["Stress Score"] = df["Stress Score"].replace(1,"Low_stress")
df["Stress Score"] = df["Stress Score"].replace(2,"Low_stress")
df["Stress Score"] = df["Stress Score"].replace(3,"Moderate_stress")
df["Stress Score"] = df["Stress Score"].replace(4,"High_stress")
df["Stress Score"] = df["Stress Score"].replace(5,"High_stress")
stress_count=df.value_counts(["Stress Score"])
stress_frame=stress_count.to_frame()
stress_frame.reset_index(inplace=True)
stress_frame.sort_values(by="count",ascending=True,inplace=True)
stress_frame
Out[ ]:
Stress Score count
2 Low_stress 55
1 Moderate_stress 71
0 High_stress 177
In [ ]:
label_data= stress_frame["Stress Score"]
count_data = stress_frame["count"]

 
plt.pie(count_data, labels=label_data,autopct='%1.1f%%',wedgeprops = { 'linewidth' : 2, 'edgecolor' : 'white' },colors=["red","darkgreen","hotpink","purple","orange"],startangle=0)
plt.title("Stress Score among Students")
plt.show()
No description has been provided for this image

Insight: High Stress levels among students. Approx 58% of respondents report high stress.

In [ ]:
import plotly.express as px
In [ ]:
df2=df.value_counts(["Length of Stay","Stress Score"])
df3=df2.to_frame()
df3.reset_index(inplace=True)
In [ ]:
# %pip install nbformat 
In [ ]:
fig = px.scatter(x="Stress Score", y="Length of Stay",size_max=60,size="count",data_frame=df3)
fig.update_layout(title_text = "Length of stay Vs Stress",showlegend = True)
        
fig.show()

Insight: As one gets used to the place, their stress levels go down. Among the newbies, 58% respondents experienced high levels of stress. 66% of respondents who stayed here for more than 1 year experience high level stress. However, only 30 % of people who have been in Lavasa for more than 2 years experience high stress. Those who have been here for more than 2 years are in courses like BA LLB,etc. With more time they get are able to get accustomed to Lavasa. However, the other categories include a large percent of students in Master Courses, who dont have the luxury of time in Lavasa.

In [ ]:
sns.stripplot(x=df["Counselling Service Usage"],y=df["Mental Health Rating"],size=3,jitter=.3)
plt.xlabel('Utilization of Counselling')
plt.ylabel('Mental Health Rating')
plt.title("Mental Health Rating Vs Counselling Usage")
plt.grid(True)
plt.show()
No description has been provided for this image

Insight: Irrespective of Mental health, a majority of respondents are not using the counselling services provided.

In [ ]:
plt.figure(figsize=(5,5))
sns.countplot(hue="Counselling Service Usage",x="Mental Health Rating",data=df,palette="dark",stat="percent")

plt.title("Mental Health Rating and using of Counselling service")
plt.xticks(rotation=10)
plt.show()
No description has been provided for this image
In [ ]:
df["Importance of Prayer,Meditation"] = df["Importance of Prayer,Meditation"].replace(1,"Low_Importance")
df["Importance of Prayer,Meditation"] = df["Importance of Prayer,Meditation"].replace(2,"Low_Importance")
df["Importance of Prayer,Meditation"] = df["Importance of Prayer,Meditation"].replace(3,"Moderate_Importance")
df["Importance of Prayer,Meditation"] = df["Importance of Prayer,Meditation"].replace(4,"High_Importance")
df["Importance of Prayer,Meditation"] = df["Importance of Prayer,Meditation"].replace(5,"High_Importance")
sns.countplot(hue="Importance of Prayer,Meditation",x="Stress Score",data=df,stat="percent")
sns.despine(top=True)
plt.title("How Prayer, Meditation affects Stress")
plt.show()
No description has been provided for this image

Respondents who report high stress levels have higher importance for prayer and meditation.

In [ ]:
df=pd.read_csv("cleaned_data.csv")
g1=df.set_index("Counselling Service Usage")
used_g_form=g1.loc[["No"]]
In [ ]:
plt.figure(figsize=(5,5))
plt.hist(used_g_form["Mental Health Rating"],edgecolor="white",bins=[0.5,1.5,2.5,3.5,4.5,5.5],color="red")
plt.xlabel("Stress Score")
plt.ylabel("No. of Students not using Counselling Services")
plt.title("Stress Score among students not using Counselling service")
plt.show()
No description has been provided for this image

Insight: Among the people who are not using the services, there is high amount of stress. therefore, there is something pulling the students away from seeking help

In [ ]:
df["Sleep Duration"].replace("Less than 4 hours","Less than 6 hr",inplace=True)
df["Sleep Duration"].replace("4-6 hours","Less than 6 hr",inplace=True)
df["Sleep Duration"].replace("7-8 hours","6hr or more",inplace=True)
df["Sleep Duration"].replace("More than 8 hours","6hr or more",inplace=True)
plt.figure(figsize=(5,5))
sns.countplot(hue="Sleep Duration",x="Mental Health Rating",data=df,palette="pastel")
sns.despine(top=True)
plt.title("Sleep Duration Vs Mental Health Rating")

plt.show()
No description has been provided for this image

Observation: Those who have best and worst mental health tend to have a good amount of sleep. For other categories data is inconclusive.

In [ ]:
plt.figure(figsize=(5,5))
sns.countplot(hue="Sleep Duration",x="Stress Score",data=df,palette="bright")
sns.despine(top=True,right=True)
plt.title("Sleep Duration and Stress")
plt.show()
No description has been provided for this image

Therefore less sleep => more stress Explanation for Anomaly in the case of those with stress score=5 could be that people tend to escape through sleep.

In [ ]:
# df["Sleeping"]=df["Sleeping"].replace("7-8 hours","good")
# df["Sleeping"]=df["Sleeping"].replace("More than 8 hours","good")
# df["Sleeping"]=df["Sleeping"].replace("4-6 hours","modarate")
# df["Sleeping"]=df["Sleeping"].replace("Less than 4 hours","bad")
sns.stripplot(x=df["Sleep Duration"],y=df["Course"],size=4,jitter=.4,alpha=.8,hue=df["Gender"],)
plt.xlabel('Sleep')
plt.ylabel('Courses')
plt.title('Influence of Course on Sleep Duration')
plt.grid(True)
plt.xticks(rotation=45,fontsize=8)
plt.show()
No description has been provided for this image

Sleep is less in Msc Ds, MBA, BA LLB, BBA LLB

Insight: Students in MBA and MSc Data SCience report high levels of stress when compared to others. Also, a large number of students report high levels of stress.

In [ ]:
sns.stripplot(x=df["Gender"],y=df["Mental Health Rating"],size=3,jitter=.3,alpha=0.5 )
plt.xlabel('Gender')
plt.ylabel("Mental Health Rating")
plt.title('Gender Vs Mental Health Rating ')
plt.show()    
No description has been provided for this image
In [ ]:
sns.stripplot(x=df["Gender"],y=df["Stress Score"],size=3,jitter=.3,alpha=0.5 )
plt.xlabel('Gender')
plt.ylabel("Stress Score")
plt.title('Gender Vs Stress Level ')
plt.show()    
No description has been provided for this image

Females are more stressed than males

In [ ]:
sns.stripplot(x=df["First Time Away From Home"],y=df["Stress Score"],size=3,jitter=.3,alpha=0.5)
plt.xlabel('Stress Score')
plt.ylabel('Staying away from home for first time?')
plt.title('Effect of staying away from home for first time')
plt.show()   
No description has been provided for this image

Those who stay away from home for the first time are more stressed

In [ ]:
df["Mental Health Rating"]=df["Mental Health Rating"].replace(1,"low_mentalhealth")
df["Mental Health Rating"]=df["Mental Health Rating"].replace(2,"low_mentalhealth")
df["Mental Health Rating"]=df["Mental Health Rating"].replace(3,"moderate_mentalhealth")
df["Mental Health Rating"]=df["Mental Health Rating"].replace(4,"high_mentalhealth")
df["Mental Health Rating"]=df["Mental Health Rating"].replace(5,"high_mentalhealth")
pivot_table = df.groupby(['Family Structure', 'Mental Health Rating']).size().unstack(fill_value=0)

plt.figure(figsize=(12, 6))
sns.heatmap(pivot_table, annot=True, cmap='viridis', fmt='d')
plt.xlabel('mental health')
plt.ylabel('family type')
plt.title('Correlation between family type and mental health')
plt.show()
No description has been provided for this image

inconclusive evidence

In [ ]:
df['Gender']=df['Gender'].replace("Other",np.nan)
df.dropna(inplace=True)
male_data = df[df['Gender'] == 'Male']
female_data = df[df['Gender'] == 'Female']
male_satisfaction = male_data["Coping Strategies"]
female_satisfaction = female_data["Coping Strategies"]

plt.figure(figsize=(10, 4))
plt.hist(male_satisfaction, bins=30, alpha=1, label='Male',align = 'left')
plt.hist(female_satisfaction, bins=30, alpha=1, label='Female',align = 'right')
plt.xlabel('escape method from stress')
plt.ylabel('Frequency')
plt.title('Analysis of Coping Strategies')
plt.legend()
plt.show()
No description has been provided for this image

A large percent of students prefer listening to music to get relief from stress. 33% of males and 38% of females prefer music as a way of esaping stress.

In [ ]: